from bs4 import BeautifulSoup as f
import requests
import xlwt
class School():
    def __init__(self):
        pass
    def GetHtml(self, url):
        try:
            response = requests.get(url)
            response.raise_for_status()
            response.encoding = 'utf-8'
            return response.text
        except requests.HTTPError as e:
            print('对不起，访问错误')
    def parseHtml(self, content):
        list = []
        soup = f(content, 'html.parser')
        for tag in soup.find_all('tr', 'alt'):
            temp = []
            for item in tag.find_all('td'):
                temp.append(item.string)
            list.append(temp)
        return list

    def sortHtml(self, ls):
        print('排名   学校排名               省市    总分')
        for line in ls:
            print('{0:7s}{1:18s}{2:6s}{3:6s}'.format(line[0], line[1], line[2], line[3]))
if __name__ == "__main__":
    url = 'http://www.zuihaodaxue.cn/zuihaodaxuepaiming2018.html'
    school = School()
    content = school.GetHtml(url)
    ls = school.parseHtml(content)

lis = []
for i in ls:
    lis.append(i)
all_datas = lis
xls = xlwt.Workbook()
data_sheet = xls.add_sheet("中国最好大学排名")
title_datas = ['排名', '学校名称', '省市', '总分', '就业率', '生源质量', '培养结果', '社会声誉', '科研规模', '顶尖成果', '顶尖人才', '科技服务', '成果转化', '学生国际化']

for col in range(len(all_datas)):

    for row in range(len(title_datas)):
        if col == 0:
            data_sheet.write(col, row, title_datas[row])
        data_sheet.write(col + 1, row, all_datas[col][row])

xls.save("中国大学排名.xls")